chooseCRANmirror(graphics=FALSE, ind=1)
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse) # data manipulation and graphs
## -- Attaching packages -------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.1
## v tidyr 0.8.3 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.3
## Warning: package 'purrr' was built under R version 3.5.3
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ----------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr) # string manipulation
library(lubridate) # date manipulation
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library('wordcloud') # wordcloud
## Warning: package 'wordcloud' was built under R version 3.5.3
## Loading required package: RColorBrewer
library(tidytext) # tidy implementation of NLP methods
## Warning: package 'tidytext' was built under R version 3.5.3
library(DT) # table format display of data
## Warning: package 'DT' was built under R version 3.5.3
library(leaflet) # maps
## Warning: package 'leaflet' was built under R version 3.5.3
library(igraph) # graphs
## Warning: package 'igraph' was built under R version 3.5.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggraph) # graphs
## Warning: package 'ggraph' was built under R version 3.5.3
library(topicmodels) # for LDA topic modelling
## Warning: package 'topicmodels' was built under R version 3.5.3
library(tm) # general text mining functions, making document term matrixes
## Warning: package 'tm' was built under R version 3.5.3
## Loading required package: NLP
## Warning: package 'NLP' was built under R version 3.5.2
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(SnowballC) # for stemming
## Warning: package 'SnowballC' was built under R version 3.5.2
library(textcat)
## Warning: package 'textcat' was built under R version 3.5.3
library(dplyr)
rm(list=ls())
fillColor = "#FFA07A"
fillColor2 = "#F1C40F"
business_raw <- read.csv(file="yelp_business.csv", header=TRUE, sep=",")
business <- business_raw
target <- c("NV", "AZ")
business = business %>% filter(state %in% target)
datatable(head(business), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
#Most Popular Categories
The most popular categories of business are plotted in the bar plot
categories = str_split(business$categories,";")
categories = as.data.frame(unlist(categories))
colnames(categories) = c("Name")
categories %>%
group_by(Name) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(Name = reorder(Name,Count)) %>%
head(10) %>%
ggplot(aes(x = Name,y = Count)) +
geom_bar(stat='identity',colour="white", fill =fillColor2) +
geom_text(aes(x = Name, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Name of Category', y = 'Count',
title = 'Top 10 Categories of Business') +
coord_flip() +
theme_bw()
We show the Top Ten Cities which has the most Business parties mentioned in Yelp
business %>%
group_by(city) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(City = reorder(city,Count)) %>%
head(10) %>%
ggplot(aes(x = City,y = Count)) +
geom_bar(stat='identity',colour="white", fill =fillColor) +
geom_text(aes(x = City, y = 1, label = paste0("(",round(Count/1e3)," K )",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'City', y = 'Count of Reviews',
title = 'Top Ten Cities with the most Business parties in Yelp') +
coord_flip() +
theme_bw()
#Map of the business parties in Nevada
Seems from the map that most of the business is in the neighborhood of The Strip in Las Vagas.
From Wikipedia
The Las Vegas Strip is a stretch of South Las Vegas Boulevard in Clark County, Nevada that is known for its concentration of resort hotels and casinos. The Strip is approximately 4.2 miles (6.8 km) in length,[1] located immediately south of the Las Vegas city limits in the unincorporated towns of Paradise and Winchester.
NavadaCoords = business %>% filter(state == "NV")
center_lon = median(NavadaCoords$longitude,na.rm = TRUE)
center_lat = median(NavadaCoords$latitude,na.rm = TRUE)
leaflet(NavadaCoords) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
addCircles(lng = ~longitude, lat = ~latitude,radius = ~sqrt(review_count)) %>%
# controls
setView(lng=center_lon, lat=center_lat,zoom = 13)
ArizonaCoords = business %>% filter(state == "AZ")
center_lon = median(ArizonaCoords$longitude,na.rm = TRUE)
center_lat = median(ArizonaCoords$latitude,na.rm = TRUE)
leaflet(ArizonaCoords) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
addCircles(lng = ~longitude, lat = ~latitude,radius = ~sqrt(review_count)) %>%
# controls
setView(lng=center_lon, lat=center_lat,zoom = 13)
reviews_raw <- read.csv(file="yelp_review.csv", header=TRUE, sep=",")
reviews <- reviews_raw
datatable(head(reviews), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
A glimpse of the reviews data
glimpse(reviews)
## Observations: 5,261,668
## Variables: 9
## $ review_id <fct> vkVSCC7xljjrAI4UGfnKEQ, n6QzIUObkYshz4dz2QRJTw, MV...
## $ user_id <fct> bv2nCi5Qv5vroFiqKGopiw, bv2nCi5Qv5vroFiqKGopiw, bv...
## $ business_id <fct> AEx2SYEUJmTxVVB18LlCwA, VR6GpWIda3SfvPC-lg9H3w, CK...
## $ stars <int> 5, 5, 5, 4, 4, 5, 4, 4, 3, 5, 4, 3, 1, 3, 5, 4, 1,...
## $ date <fct> 2016-05-28, 2016-05-28, 2016-05-28, 2016-05-28, 20...
## $ text <fct> "Super simple place but amazing nonetheless. It's ...
## $ useful <int> 0, 0, 0, 0, 0, 1, 0, 1, 1, 3, 1, 5, 9, 2, 2, 0, 0,...
## $ funny <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
## $ cool <int> 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0,...
Detecting the language of the first Ten reviews.
textcat(reviews[1:10,]$text)
## [1] "english" "english" "english" "english" "english" "english" "english"
## [8] "english" "english" "english"
The following plot shows the names of business with the most Five Star Reviews.Mon Ami Gabi and Bacchanal Buffet are the Two most popular restaurants from the Yelp reviews with Five Star ratings. We will do a deep dive for these restaurants.
most5StarsReviews = reviews %>%
filter(stars == 5) %>%
group_by(business_id) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(BusinessID = reorder(business_id,Count)) %>%
head(10)
most5StarsReviews = inner_join(most5StarsReviews,business)
## Joining, by = "business_id"
most5StarsReviews %>%
mutate(name = reorder(name,Count)) %>%
ggplot(aes(x = name,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor) +
geom_text(aes(x = name, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Name of the Business',
y = 'Count',
title = 'Name of the Business and Count') +
coord_flip() +
theme_bw()
The location and category of the most liked business Mon Ami Gabi is shown below
mon_ami_gabi = business %>% filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw") %>%
select(name,neighborhood,city,state,postal_code,categories)
datatable(head(mon_ami_gabi), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
##Useful,funny,cool reviews
The following plot describes the number of Useful, Funny and Cool reviews.Most of the reviews are NOT useful , funny or cool.
mon_ami_gabi_reviews = reviews %>%
filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw")
mon_ami_gabi_reviews %>%
group_by(useful) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(useful = reorder(useful,Count)) %>%
head(10) %>%
ggplot(aes(x = useful,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor) +
geom_text(aes(x = useful, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Useful Reviews',
y = 'Count',
title = 'Useful Reviews and Count') +
coord_flip() +
theme_bw()
mon_ami_gabi_reviews %>%
group_by(funny) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(funny = reorder(funny,Count)) %>%
head(10) %>%
ggplot(aes(x = funny,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor2) +
geom_text(aes(x = funny, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Funny Reviews',
y = 'Count',
title = 'Funny Reviews and Count') +
coord_flip() +
theme_bw()
mon_ami_gabi_reviews %>%
group_by(cool) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(cool = reorder(cool,Count)) %>%
head(10) %>%
ggplot(aes(x = cool,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor) +
geom_text(aes(x = cool, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Cool Reviews',
y = 'Count',
title = 'Cool Reviews and Count') +
coord_flip() +
theme_bw()
A word cloud is a graphical representation of frequently used words in the text. The height of each word in this picture is an indication of frequency of occurrence of the word in the entire text. The words steak, service, vegas,french,patio,bellagio,delicious, nice are the words which have been used very frequently in the reviews.Note that if we choose a word which is not food related , it is Service and we will see in the subsequent sections of sentiment analysis and topic modelling , why this keyword is important.
#createWordCloud = function(train)
#{
# train %>%
# unnest_tokens(word, text) %>%
# filter(!word %in% stop_words$word) %>%
# count(word,sort = TRUE) %>%
# ungroup() %>%
# head(30) %>%
# with(wordcloud(word, n, max.words = 30,colors=brewer.pal(8, "Dark2")))
#}
#createWordCloud(reviews %>%
# filter(business_id == "4JNXUYY8wbaaDmk3BPzlWw"))
library(gdata)
## gdata: Unable to locate valid perl interpreter
## gdata:
## gdata: read.xls() will be unable to read Excel XLS and XLSX files
## gdata: unless the 'perl=' argument is used to specify the location
## gdata: of a valid perl intrpreter.
## gdata:
## gdata: (To avoid display of this message in the future, please
## gdata: ensure perl is installed and available on the executable
## gdata: search path.)
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLX' (Excel 97-2004) files.
##
## gdata: Unable to load perl libaries needed by read.xls()
## gdata: to support 'XLSX' (Excel 2007+) files.
##
## gdata: Run the function 'installXLSXsupport()'
## gdata: to automatically download and install the perl
## gdata: libaries needed to support Excel XLS and XLSX formats.
##
## Attaching package: 'gdata'
## The following objects are masked from 'package:dplyr':
##
## combine, first, last
## The following object is masked from 'package:purrr':
##
## keep
## The following object is masked from 'package:stats':
##
## nobs
## The following object is masked from 'package:utils':
##
## object.size
## The following object is masked from 'package:base':
##
## startsWith
library(ggplot2)
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following object is masked from 'package:ggplot2':
##
## diamonds
## The following objects are masked from 'package:datasets':
##
## cars, trees
library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(zipcode)
#1. Read the data
df <- read_xlsx("MedianZIP.xlsx")
## New names:
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
head(df)
## # A tibble: 6 x 4
## `Data from: http://www.psc.isr.umich.edu/d~ ...2 ...3 ...4
## <chr> <chr> <chr> <chr>
## 1 Zip Median Mean Pop
## 2 1001 56662.5734~ 66687.7508~ 16445
## 3 1002 49853.4176~ 75062.6343~ 28069
## 4 1003 28462 35121 8491
## 5 1005 75423 82442 4798
## 6 1007 79076.3540~ 85801.9750~ 12962
str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 32635 obs. of 4 variables:
## $ Data from: http://www.psc.isr.umich.edu/dis/census/Features/tract2zip/: chr "Zip" "1001" "1002" "1003" ...
## $ ...2 : chr "Median" "56662.573499999999" "49853.417699999998" "28462" ...
## $ ...3 : chr "Mean" "66687.750899999999" "75062.634300000005" "35121" ...
## $ ...4 : chr "Pop" "16445" "28069" "8491" ...
View(df)
#2. Clean up the dataframe
#2a. Remove unneeded information
df <- df[-1,]
#2b. Update column names (zip, median, mean, population)
colnames(df) <- c("zip","median","mean","population")
#remove commas and make numeric
df$median <- as.numeric(gsub(",", "", df$median))
df$mean <- as.numeric(gsub(",", "", df$mean))
## Warning: NAs introduced by coercion
df$population <- as.numeric(gsub(",", "", df$population))
#3. Load the 'zipcode' package
data(zipcode)
df$zip <- clean.zipcodes(df$zip) #reformat the zip codes
#4. Merge the zip code information from two data frames
dfNew <- merge(df, zipcode, by ="zip")
head(dfNew)
## zip median mean population city state latitude longitude
## 1 01001 56662.57 66687.75 16445 Agawam MA 42.07061 -72.62029
## 2 01002 49853.42 75062.63 28069 Amherst MA 42.37765 -72.50323
## 3 01003 28462.00 35121.00 8491 Amherst MA 42.36956 -72.63599
## 4 01005 75423.00 82442.00 4798 Barre MA 42.41209 -72.10443
## 5 01007 79076.35 85801.98 12962 Belchertown MA 42.27842 -72.41100
## 6 01008 63980.00 78391.00 1244 Blandford MA 42.17431 -72.94828
#5. Remove Hawaii and Alaska
dfNew <- dfNew[dfNew$state != "HI", ]
dfNew <- dfNew[dfNew$state != "AK", ]
dfNew <- dfNew[dfNew$state != "DC", ]
#Step 2: Show the income & population per state
# 1) Create a simpler dataframe, with just the average median income and the the population for each state.
# 2) Add the state abbreviations and the state names as new columns (make sure the state names are all lower case)
# 3) Show the U.S. map, representing the color with the average median income of that state
# 4) Create a second map with color representing the population of the state
#Create a simpler dataframe with median income and population
income <- tapply(dfNew$median, dfNew$state, mean)
state <- rownames(income)
medianIncome <- data.frame(state, income)
pop <- tapply(dfNew$population, dfNew$state, sum)
state <- rownames(pop)
statePop <- data.frame(state,pop)
dfSimple <- merge(medianIncome, statePop, by="state")
head(dfSimple)
## state income pop
## 1 AL 40549.90 4770242
## 2 AR 36960.95 2936699
## 3 AZ 48132.06 6360679
## 4 CA 62628.71 36927999
## 5 CO 56303.02 4979279
## 6 CT 78520.16 3548308
# previous steps can be done using sql instead and scaling the income at the state level
dfSimple<- sqldf("select state, avg(median) as income, sum(population) as pop from dfNew group by state")
dfSimple<- sqldf("select state, (income/pop) as income, pop from dfSimple")
head(dfSimple)
## state income pop
## 1 AL 0.008500597 4770242
## 2 AR 0.012585883 2936699
## 3 AZ 0.007567126 6360679
## 4 CA 0.001695968 36927999
## 5 CO 0.011307464 4979279
## 6 CT 0.022128902 3548308
#Add state abbreviations and state names (lower case)
#use match(dfSimple$state,state.abb)
dfSimple$stateName <- state.name[match(dfSimple$state,state.abb)]
#convert to lower case
dfSimple$stateName <- tolower(dfSimple$stateName)
#Show us map, representing color with average median income
us <- map_data("state")
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
#Function To remove axis formats from the heatmaps
ditch_the_axes <- theme(
axis.text = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank()
)
MapIncome <- ggplot(dfSimple, aes(map_id=stateName))
MapIncome <- MapIncome + geom_map(map=us, aes(fill=dfSimple$income))
MapIncome <- MapIncome + expand_limits(x=us$long, y=us$lat)
MapIncome <- MapIncome + coord_map()
MapIncome <- MapIncome + ggtitle("average median income by state") + theme(plot.title = element_text(hjust=0.5))
MapIncome <- MapIncome + guides(fill=guide_legend(title="Income")) + ditch_the_axes
MapIncome
# Create a second map with color representing the population of the state
MapPopulation <- ggplot(dfSimple, aes(map_id=stateName))
MapPopulation <- MapPopulation + geom_map(map=us, aes(fill=dfSimple$pop))
MapPopulation <- MapPopulation + expand_limits(x=us$long, y=us$lat)
MapPopulation <- MapPopulation + coord_map()
MapPopulation <- MapPopulation + ggtitle("Population by state") + theme(plot.title = element_text(hjust=0.5))
MapPopulation <- MapPopulation + guides(fill=guide_legend(title="Population")) + ditch_the_axes
MapPopulation
dfNew$stateName <- state.name[match(dfNew$state,state.abb)]
dfNew$stateName <- tolower(dfNew$stateName)
MapZip <- ggplot(dfNew, aes(map_id=stateName))
MapZip <- MapZip + geom_map(map=us, fill="black", color="white")
MapZip <- MapZip + expand_limits(x=us$long, y=us$lat)
MapZip <- MapZip + geom_point(data=dfNew, aes(x=dfNew$longitude, y=dfNew$latitude, color=dfNew$median))
MapZip <- MapZip + coord_map()
MapZip <- MapZip + ggtitle("income per zip code") + theme(plot.title=element_text(hjust=0.5))
MapZip <- MapZip + ditch_the_axes
MapZip
head(dfNew)
## zip median mean population city state latitude longitude
## 1 01001 56662.57 66687.75 16445 Agawam MA 42.07061 -72.62029
## 2 01002 49853.42 75062.63 28069 Amherst MA 42.37765 -72.50323
## 3 01003 28462.00 35121.00 8491 Amherst MA 42.36956 -72.63599
## 4 01005 75423.00 82442.00 4798 Barre MA 42.41209 -72.10443
## 5 01007 79076.35 85801.98 12962 Belchertown MA 42.27842 -72.41100
## 6 01008 63980.00 78391.00 1244 Blandford MA 42.17431 -72.94828
## stateName
## 1 massachusetts
## 2 massachusetts
## 3 massachusetts
## 4 massachusetts
## 5 massachusetts
## 6 massachusetts
MapDensity <- ggplot(dfNew, aes(map_id=stateName))
MapDensity <- MapDensity + geom_map(map=us, fill="black", color="white")
MapDensity <- MapDensity + expand_limits(x=us$long, y=us$lat)
MapDensity <- MapDensity + stat_density_2d(data=dfNew, aes(x=dfNew$longitude, y=dfNew$latitude))
MapDensity <- MapDensity + coord_map()
MapDensity <- MapDensity + ggtitle("zip code density") + theme(plot.title=element_text(hjust=0.5))
MapDensity
#Step 5: Zoom in to the region around Nevada # 1) Repeat steps 3 & 4, but have the image / map be of the northeast U.S. (centered around Vegas).
zoomGeo <- geocode("Las Vegas, NV", source = "dsk")
## Source : http://www.datasciencetoolkit.org/maps/api/geocode/json?address=Las+Vegas,+NV
zoomAmount <- 10
centerx <- zoomGeo$lon
centery <- zoomGeo$lat
ylimit <- c(centery-zoomAmount, centery+zoomAmount)
xlimit <- c(centerx-zoomAmount, centerx+zoomAmount)
#Show income per zip code around Vegas
MapZipZoom <- MapZip + xlim(xlimit) + ylim(ylimit) + coord_map()
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
MapZipZoom <- MapZipZoom + geom_point(aes(x=centerx, y=centery), color="darkred", size=3)
MapZipZoom <- MapZipZoom + ggtitle("Income by Zip around Vegas ") + theme(plot.title=element_text(hjust=0.5))
MapZipZoom
## Warning: Removed 28232 rows containing missing values (geom_point).
zoomGeo <- geocode("Phoenix, AZ", source = "dsk")
## Source : http://www.datasciencetoolkit.org/maps/api/geocode/json?address=Phoenix,+AZ
zoomAmount <- 10
centerx <- zoomGeo$lon
centery <- zoomGeo$lat
ylimit <- c(centery-zoomAmount, centery+zoomAmount)
xlimit <- c(centerx-zoomAmount, centerx+zoomAmount)
#Show income per zip code around Phoenix
MapZipZoom <- MapZip + xlim(xlimit) + ylim(ylimit) + coord_map()
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
MapZipZoom <- MapZipZoom + geom_point(aes(x=centerx, y=centery), color="darkred", size=3)
MapZipZoom <- MapZipZoom + ggtitle("Income by Zip around Phoenix ") + theme(plot.title=element_text(hjust=0.5))
MapZipZoom
## Warning: Removed 28735 rows containing missing values (geom_point).
#devtools::install_github("UrbanInstitute/urbnmapr")
#devtools::install_github("UI-Research/urbnthemes")
library(urbnmapr)
library(urbnthemes)
## Setting Windows options...
##
## Attaching package: 'urbnthemes'
## The following objects are masked from 'package:ggplot2':
##
## geom_bar, geom_col, geom_jitter, geom_line, geom_path,
## geom_point, geom_step, geom_text, scale_color_discrete,
## scale_color_gradientn, scale_colour_discrete,
## scale_colour_gradientn, scale_colour_ordinal,
## scale_fill_discrete, scale_fill_gradientn, scale_fill_ordinal
spatial_data <- left_join(statedata,
get_urbn_map(map = "states", sf = TRUE),
by = "state_name")
ggplot() +
geom_sf(spatial_data,
mapping = aes(fill = horate),
color = "#ffffff", size = 0.25) +
labs(fill = "Homeownership rate")
## Warning: Computation failed in `stat_sf()`:
## no applicable method for 'st_bbox' applied to an object of class "list"
set_urbn_defaults(style = "map")
states_sf <- get_urbn_map(map = "states", sf = TRUE)
statedata %>%
left_join(states_sf, by = "state_name") %>%
ggplot() +
geom_sf(mapping = aes(fill = horate),
color = "#ffffff", size = 0.25) +
scale_fill_gradientn(labels = scales::percent) +
labs(fill = "Homeownership rate") +
coord_sf(datum = NA)